{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load Labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm_notebook as tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "twitter15_label_file = '../twitter15/label.txt'\n",
    "twitter15_text_file = '../twitter15/source_tweets.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_labels(file):\n",
    "    f = open(file,'r')\n",
    "    labels = {}\n",
    "    \n",
    "    raw_data = f.readlines()\n",
    "    \n",
    "    for line in raw_data:\n",
    "        line = line.strip()\n",
    "        line = line.split(':')\n",
    "        labels[int(line[1])] = line[0]\n",
    "    \n",
    "    return labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "twitter15_labels = load_labels(twitter15_label_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{731166399389962242: 'unverified',\n",
       " 714598641827246081: 'unverified',\n",
       " 691809004356501505: 'non-rumor',\n",
       " 693204708933160960: 'non-rumor',\n",
       " 551099691702956032: 'true',\n",
       " 767223774072676354: 'non-rumor',\n",
       " 715515982584881152: 'unverified',\n",
       " 514106273852174337: 'true',\n",
       " 500319801344929795: 'unverified',\n",
       " 495366618818830336: 'false',\n",
       " 532206910796468224: 'false',\n",
       " 560187970389819392: 'false',\n",
       " 531568534066057217: 'false',\n",
       " 489829414704648192: 'false',\n",
       " 524925730053181440: 'unverified',\n",
       " 766989078294306816: 'non-rumor',\n",
       " 499530130487017472: 'unverified',\n",
       " 520284654755381249: 'false',\n",
       " 767515401831997440: 'non-rumor',\n",
       " 565999191982616577: 'true',\n",
       " 554343513887105024: 'false',\n",
       " 767715489485205505: 'non-rumor',\n",
       " 553467311261503488: 'unverified',\n",
       " 553960736964476928: 'false',\n",
       " 500303431928922113: 'unverified',\n",
       " 538900739880665088: 'unverified',\n",
       " 516420964834611201: 'unverified',\n",
       " 80080680482123777: 'false',\n",
       " 687945926774800385: 'non-rumor',\n",
       " 436089462326849536: 'false',\n",
       " 568589712644026368: 'false',\n",
       " 767360696761905153: 'non-rumor',\n",
       " 407231591191035904: 'true',\n",
       " 505369323922522113: 'false',\n",
       " 576513463738109954: 'unverified',\n",
       " 689214940679409664: 'non-rumor',\n",
       " 522782267215982592: 'unverified',\n",
       " 387233034350514176: 'false',\n",
       " 693554344298467328: 'non-rumor',\n",
       " 706665777332621314: 'unverified',\n",
       " 531530591427391488: 'unverified',\n",
       " 532199717594497024: 'false',\n",
       " 503498696273965056: 'unverified',\n",
       " 553184482241814530: 'unverified',\n",
       " 692838349036326912: 'non-rumor',\n",
       " 580320684305416192: 'unverified',\n",
       " 514057033419849728: 'true',\n",
       " 693696168908423168: 'non-rumor',\n",
       " 550284252672450560: 'unverified',\n",
       " 407167258452971520: 'true',\n",
       " 509464457836511232: 'true',\n",
       " 499612545909415938: 'unverified',\n",
       " 692892472729415680: 'non-rumor',\n",
       " 527131140721623040: 'unverified',\n",
       " 726043971911213057: 'unverified',\n",
       " 730939370765754368: 'unverified',\n",
       " 693806234026188801: 'non-rumor',\n",
       " 692082337333534720: 'non-rumor',\n",
       " 551117430345711616: 'false',\n",
       " 407161635439013888: 'true',\n",
       " 561280944045576192: 'false',\n",
       " 723365789378584578: 'unverified',\n",
       " 532106726539939841: 'false',\n",
       " 689719109785907200: 'non-rumor',\n",
       " 561952533942661120: 'false',\n",
       " 550038665653542913: 'false',\n",
       " 519348106836860929: 'true',\n",
       " 387007120689737729: 'false',\n",
       " 525327108520673281: 'unverified',\n",
       " 450341615979069440: 'false',\n",
       " 387281927427727360: 'false',\n",
       " 553503184174710784: 'unverified',\n",
       " 524312881823637504: 'true',\n",
       " 510906049549705216: 'true',\n",
       " 550131732805328896: 'unverified',\n",
       " 693122199536934912: 'non-rumor',\n",
       " 524185876931162112: 'true',\n",
       " 532284428542681088: 'false',\n",
       " 545271117355958272: 'true',\n",
       " 516545908813475841: 'unverified',\n",
       " 407254590363426816: 'true',\n",
       " 524941504796962816: 'true',\n",
       " 549927969032916993: 'unverified',\n",
       " 692046121250586624: 'non-rumor',\n",
       " 689650617464311813: 'non-rumor',\n",
       " 435998882250899456: 'false',\n",
       " 544523550519218176: 'unverified',\n",
       " 714531423273746432: 'unverified',\n",
       " 693595814401327104: 'non-rumor',\n",
       " 514451906211893248: 'true',\n",
       " 515519742355185664: 'true',\n",
       " 767710245779103744: 'non-rumor',\n",
       " 519929168864497664: 'false',\n",
       " 651809229842608128: 'unverified',\n",
       " 500377145349521411: 'unverified',\n",
       " 675434959939756032: 'non-rumor',\n",
       " 489833932813516800: 'false',\n",
       " 548672230134382592: 'false',\n",
       " 665317131597291520: 'non-rumor',\n",
       " 693711272014155776: 'non-rumor',\n",
       " 500327120770301952: 'unverified',\n",
       " 693821069879160832: 'non-rumor',\n",
       " 532281981795848192: 'false',\n",
       " 767528171520524288: 'non-rumor',\n",
       " 496698590153416704: 'false',\n",
       " 690137637810761728: 'non-rumor',\n",
       " 546028163134808064: 'unverified',\n",
       " 522815405451407360: 'unverified',\n",
       " 531531382611513344: 'unverified',\n",
       " 376874273601630208: 'false',\n",
       " 500319675797209088: 'unverified',\n",
       " 517487705978966016: 'true',\n",
       " 521770744855134208: 'unverified',\n",
       " 553575232867672064: 'unverified',\n",
       " 501881262240694272: 'true',\n",
       " 525039784503541760: 'true',\n",
       " 759653274605740032: 'non-rumor',\n",
       " 553169307824177152: 'false',\n",
       " 572002458647412736: 'false',\n",
       " 530017051025604609: 'false',\n",
       " 693102023764492288: 'non-rumor',\n",
       " 692707593991643136: 'non-rumor',\n",
       " 693934062445187072: 'non-rumor',\n",
       " 571052823665704960: 'false',\n",
       " 543436472842334209: 'unverified',\n",
       " 689529955294777344: 'non-rumor',\n",
       " 691299810553384960: 'non-rumor',\n",
       " 528237933661544448: 'true',\n",
       " 674713782778654721: 'non-rumor',\n",
       " 500377906305327104: 'unverified',\n",
       " 650952376954650629: 'unverified',\n",
       " 407213757048160256: 'true',\n",
       " 428366153149194240: 'false',\n",
       " 570405783306944514: 'false',\n",
       " 522453569702989824: 'unverified',\n",
       " 580325090367315968: 'unverified',\n",
       " 689799953330368513: 'non-rumor',\n",
       " 524930671220105216: 'true',\n",
       " 407197672702214144: 'true',\n",
       " 693490132440129536: 'non-rumor',\n",
       " 366025766183514112: 'false',\n",
       " 767300637012951040: 'non-rumor',\n",
       " 686527768327274497: 'non-rumor',\n",
       " 764158239089029120: 'non-rumor',\n",
       " 514098803347951616: 'true',\n",
       " 522684367647559681: 'unverified',\n",
       " 765631087867367426: 'non-rumor',\n",
       " 489974591985840128: 'false',\n",
       " 536105045545472000: 'true',\n",
       " 509152237503778816: 'false',\n",
       " 407182466613792768: 'true',\n",
       " 687632433483759616: 'non-rumor',\n",
       " 407181813174788096: 'true',\n",
       " 536962520477470720: 'true',\n",
       " 554655549896159233: 'false',\n",
       " 436157173161144321: 'false',\n",
       " 550175324634619904: 'unverified',\n",
       " 551120768164462592: 'true',\n",
       " 524965786059026432: 'true',\n",
       " 510908595144519680: 'true',\n",
       " 568580363339091968: 'false',\n",
       " 763738618573623297: 'unverified',\n",
       " 531498841158082562: 'unverified',\n",
       " 757367391202471937: 'unverified',\n",
       " 692357775859564544: 'non-rumor',\n",
       " 524928119955013632: 'true',\n",
       " 500413818368184321: 'unverified',\n",
       " 554722567224049664: 'true',\n",
       " 756282236375277568: 'unverified',\n",
       " 670380354822348801: 'non-rumor',\n",
       " 692134022839955456: 'non-rumor',\n",
       " 517051126466625536: 'true',\n",
       " 553538058440941568: 'unverified',\n",
       " 527308173112139776: 'true',\n",
       " 524958128392376320: 'true',\n",
       " 407217471473451008: 'true',\n",
       " 550003619303788545: 'unverified',\n",
       " 407176055657484289: 'true',\n",
       " 521727831307722753: 'false',\n",
       " 516727880571572225: 'unverified',\n",
       " 522321383716818944: 'true',\n",
       " 516423807394131968: 'unverified',\n",
       " 693605375803813889: 'non-rumor',\n",
       " 551106923601219585: 'true',\n",
       " 489854703321509888: 'false',\n",
       " 716461257025581056: 'unverified',\n",
       " 553099685888790528: 'false',\n",
       " 407279190913069056: 'true',\n",
       " 693524179597955072: 'non-rumor',\n",
       " 427846006600777729: 'true',\n",
       " 524977992683237376: 'true',\n",
       " 523535962962280448: 'false',\n",
       " 351767344097398785: 'false',\n",
       " 326137285450018817: 'false',\n",
       " 745236050407194624: 'unverified',\n",
       " 692581635594694658: 'non-rumor',\n",
       " 455525437351800832: 'false',\n",
       " 514512849876836352: 'false',\n",
       " 687631811560763392: 'non-rumor',\n",
       " 767068504277344260: 'non-rumor',\n",
       " 519965814175567872: 'false',\n",
       " 688096910217879554: 'non-rumor',\n",
       " 767355611277168642: 'non-rumor',\n",
       " 532259028152246272: 'false',\n",
       " 514109888138842113: 'true',\n",
       " 729647367457230850: 'unverified',\n",
       " 726442550266044416: 'unverified',\n",
       " 516345893139001346: 'unverified',\n",
       " 553501357156876290: 'unverified',\n",
       " 690670464440504320: 'non-rumor',\n",
       " 692816955837804544: 'non-rumor',\n",
       " 538010064591028224: 'unverified',\n",
       " 535282587325833216: 'unverified',\n",
       " 514098833253748736: 'true',\n",
       " 552498516225179650: 'true',\n",
       " 727966590084485120: 'unverified',\n",
       " 489798752438583297: 'false',\n",
       " 514039286845698048: 'true',\n",
       " 509464217549021184: 'true',\n",
       " 489794696013967360: 'false',\n",
       " 547509900130021377: 'false',\n",
       " 715671763808493569: 'unverified',\n",
       " 693899062098137088: 'non-rumor',\n",
       " 553518472798683136: 'unverified',\n",
       " 689922870689009664: 'non-rumor',\n",
       " 691632933170450435: 'non-rumor',\n",
       " 532197572698320896: 'true',\n",
       " 504478567062056962: 'false',\n",
       " 516856061182345216: 'false',\n",
       " 656361703664451585: 'unverified',\n",
       " 539254751343681536: 'false',\n",
       " 714560810266132480: 'unverified',\n",
       " 501785003144273921: 'false',\n",
       " 689833530197671936: 'non-rumor',\n",
       " 489902412900343809: 'false',\n",
       " 491200302365040640: 'false',\n",
       " 534205316620382208: 'true',\n",
       " 526459255147491328: 'false',\n",
       " 691976624665919488: 'non-rumor',\n",
       " 552457656666185728: 'true',\n",
       " 722885778448121857: 'unverified',\n",
       " 525041127343550464: 'true',\n",
       " 706933939953344514: 'unverified',\n",
       " 503609826128044032: 'unverified',\n",
       " 544698560655400961: 'false',\n",
       " 500290456845299714: 'unverified',\n",
       " 714755546285477888: 'unverified',\n",
       " 532255606623989760: 'false',\n",
       " 693118410444636161: 'non-rumor',\n",
       " 509077805296914432: 'false',\n",
       " 553164985460068352: 'unverified',\n",
       " 534763214106820608: 'false',\n",
       " 407184707118309376: 'true',\n",
       " 531543620856197121: 'unverified',\n",
       " 524961721744900097: 'true',\n",
       " 531193369465470976: 'unverified',\n",
       " 377519445578895360: 'false',\n",
       " 693722259740069889: 'non-rumor',\n",
       " 763428684850094080: 'unverified',\n",
       " 688757736310484994: 'non-rumor',\n",
       " 538408710084001793: 'true',\n",
       " 752875379765968897: 'unverified',\n",
       " 742114513726623744: 'unverified',\n",
       " 691064699853606913: 'non-rumor',\n",
       " 716457799392342018: 'unverified',\n",
       " 499366666300846081: 'unverified',\n",
       " 778625026144792577: 'unverified',\n",
       " 538218607822770176: 'true',\n",
       " 715254040289021952: 'unverified',\n",
       " 724624672604610562: 'unverified',\n",
       " 514081464171102208: 'false',\n",
       " 524930365400821761: 'true',\n",
       " 91728807081426944: 'false',\n",
       " 767668607803359234: 'non-rumor',\n",
       " 655432919595548672: 'unverified',\n",
       " 536827853669941248: 'true',\n",
       " 767823175480647681: 'non-rumor',\n",
       " 742571519105077248: 'unverified',\n",
       " 693795905049329664: 'non-rumor',\n",
       " 520630231531802624: 'false',\n",
       " 535315394127728641: 'false',\n",
       " 535148463609356288: 'unverified',\n",
       " 524699123971534848: 'true',\n",
       " 407197113286545409: 'true',\n",
       " 507263441359613953: 'true',\n",
       " 436215971272212480: 'false',\n",
       " 692440373449396224: 'non-rumor',\n",
       " 568571403756294144: 'false',\n",
       " 692451275930234880: 'non-rumor',\n",
       " 766299560415223808: 'non-rumor',\n",
       " 514468018970034176: 'false',\n",
       " 693509689418588160: 'non-rumor',\n",
       " 537976883150077952: 'unverified',\n",
       " 521777301470871553: 'unverified',\n",
       " 689100941119848449: 'non-rumor',\n",
       " 518192429153783808: 'false',\n",
       " 514478918636535808: 'true',\n",
       " 707786906189303808: 'unverified',\n",
       " 691380348379201536: 'non-rumor',\n",
       " 693091255685787648: 'non-rumor',\n",
       " 551159366876147714: 'true',\n",
       " 524929106987991040: 'true',\n",
       " 524961908903149568: 'true',\n",
       " 764653644138450945: 'non-rumor',\n",
       " 523026678419714048: 'true',\n",
       " 436146437530075136: 'true',\n",
       " 693830499047989248: 'non-rumor',\n",
       " 407248574099894272: 'true',\n",
       " 560442390805090307: 'false',\n",
       " 763524712853102596: 'unverified',\n",
       " 554979642210549760: 'false',\n",
       " 490080544949276672: 'false',\n",
       " 537355288295518210: 'false',\n",
       " 553212962044149761: 'unverified',\n",
       " 554597409351671808: 'false',\n",
       " 693785810311778304: 'non-rumor',\n",
       " 733242244522725376: 'unverified',\n",
       " 509466295344304129: 'true',\n",
       " 523126268603031552: 'true',\n",
       " 523555724807262208: 'false',\n",
       " 532276693931945984: 'true',\n",
       " 692746621218656258: 'non-rumor',\n",
       " 714555825122107392: 'unverified',\n",
       " 407164597511393280: 'true',\n",
       " 519962130859429888: 'false',\n",
       " 778681502825451520: 'unverified',\n",
       " 528822281972498432: 'true',\n",
       " 407188779255222273: 'true',\n",
       " 693600369281253376: 'non-rumor',\n",
       " 518620083342835712: 'false',\n",
       " 532928419051622400: 'unverified',\n",
       " 740791134146965504: 'unverified',\n",
       " 692105324111761408: 'non-rumor',\n",
       " 521098692326350848: 'false',\n",
       " 427924694663454720: 'true',\n",
       " 528389559441444864: 'false',\n",
       " 536547921542791170: 'true',\n",
       " 525024066152169473: 'true',\n",
       " 692759090469212160: 'non-rumor',\n",
       " 500280838710247424: 'unverified',\n",
       " 693465666259656705: 'non-rumor',\n",
       " 504387128013246465: 'true',\n",
       " 500295393301647360: 'unverified',\n",
       " 524586367456919552: 'true',\n",
       " 690145844318834689: 'non-rumor',\n",
       " 500286058664579072: 'unverified',\n",
       " 518981379963777025: 'false',\n",
       " 499456140044824576: 'unverified',\n",
       " 553008832784642048: 'false',\n",
       " 489794369487380480: 'false',\n",
       " 544333764814323713: 'unverified',\n",
       " 523558773345255424: 'false',\n",
       " 767520433247809536: 'non-rumor',\n",
       " 555072815154475008: 'false',\n",
       " 761027420849745920: 'non-rumor',\n",
       " 531648145613398016: 'true',\n",
       " 693411834808946688: 'non-rumor',\n",
       " 693032076543655936: 'non-rumor',\n",
       " 687734662249246724: 'non-rumor',\n",
       " 531526274003771392: 'unverified',\n",
       " 407236151167025152: 'true',\n",
       " 655812191233417216: 'unverified',\n",
       " 538137715293061120: 'unverified',\n",
       " 538847758032310272: 'false',\n",
       " 531131094276382720: 'unverified',\n",
       " 544154048950435840: 'false',\n",
       " 727854332188577792: 'unverified',\n",
       " 742012307694223361: 'unverified',\n",
       " 707332312724283392: 'unverified',\n",
       " 387009577763360768: 'false',\n",
       " 407192370040803329: 'true',\n",
       " 516386101766782976: 'unverified',\n",
       " 407177474788057088: 'true',\n",
       " 523123023646175233: 'false',\n",
       " 755447443009916929: 'unverified',\n",
       " 761675705956655109: 'non-rumor',\n",
       " 348209998565879810: 'false',\n",
       " 489887806018183168: 'false',\n",
       " 489841070818488320: 'false',\n",
       " 364589696573124609: 'false',\n",
       " 688763993633107968: 'non-rumor',\n",
       " 693272659447119873: 'non-rumor',\n",
       " 524168442841010176: 'false',\n",
       " 705092738224525312: 'unverified',\n",
       " 509084603617792001: 'false',\n",
       " 693533469922693120: 'non-rumor',\n",
       " 510918747188514816: 'true',\n",
       " 525301290218553344: 'true',\n",
       " 500394061887709184: 'unverified',\n",
       " 724661834419048448: 'unverified',\n",
       " 757190314880884736: 'unverified',\n",
       " 692780496590254081: 'non-rumor',\n",
       " 761381302260031489: 'non-rumor',\n",
       " 644321231618076673: 'non-rumor',\n",
       " 775672628493357057: 'unverified',\n",
       " 693240976970752004: 'non-rumor',\n",
       " 551575491430211584: 'true',\n",
       " 380140114552188928: 'false',\n",
       " 273182568298450945: 'false',\n",
       " 80084555733803009: 'false',\n",
       " 522534012544638976: 'unverified',\n",
       " 725983128444129280: 'unverified',\n",
       " 500327106824245249: 'unverified',\n",
       " 692745208421679104: 'non-rumor',\n",
       " 489798752358916096: 'false',\n",
       " 427831952826449920: 'true',\n",
       " 552821069036670976: 'unverified',\n",
       " 527138218505175040: 'true',\n",
       " 766808183696351233: 'unverified',\n",
       " 538729383662338049: 'false',\n",
       " 514540926640087040: 'false',\n",
       " 723511860516016128: 'unverified',\n",
       " 684118654418419712: 'non-rumor',\n",
       " 534543768653807618: 'false',\n",
       " 516705572373397504: 'unverified',\n",
       " 693718533239365633: 'non-rumor',\n",
       " 691986377584549888: 'non-rumor',\n",
       " 524152657783820288: 'false',\n",
       " 501934077612941312: 'unverified',\n",
       " 525033697045925888: 'true',\n",
       " 407169885345943552: 'true',\n",
       " 489800427152879616: 'false',\n",
       " 523888836456108032: 'true',\n",
       " 427690961280696320: 'true',\n",
       " 521769631519092736: 'unverified',\n",
       " 692727606718824448: 'non-rumor',\n",
       " 650975967146602496: 'unverified',\n",
       " 549599086073368576: 'false',\n",
       " 693185488644149248: 'non-rumor',\n",
       " 372914976102834176: 'false',\n",
       " 534303066867367937: 'false',\n",
       " 688110059553861634: 'non-rumor',\n",
       " 523123779124600833: 'false',\n",
       " 525116411887513602: 'true',\n",
       " 407211258304552960: 'true',\n",
       " 557885928417816576: 'false',\n",
       " 524991240270807042: 'true',\n",
       " 693549273468764160: 'non-rumor',\n",
       " 370855596050108416: 'false',\n",
       " 436001434212249600: 'false',\n",
       " 689208632148365314: 'non-rumor',\n",
       " 524958992330522624: 'true',\n",
       " 693068611951300608: 'non-rumor',\n",
       " 531659884430106624: 'true',\n",
       " 509463933564891137: 'true',\n",
       " 524926472432410625: 'unverified',\n",
       " 693643778033344512: 'non-rumor',\n",
       " 690296550803636224: 'non-rumor',\n",
       " 500363740311982081: 'unverified',\n",
       " 658755852199927808: 'unverified',\n",
       " 748543642323783681: 'unverified',\n",
       " 524227209976745984: 'false',\n",
       " 732004388181434368: 'unverified',\n",
       " 684878165714563072: 'non-rumor',\n",
       " 532275383430041600: 'true',\n",
       " 513749585391390720: 'true',\n",
       " 534409916560461825: 'false',\n",
       " 538193964479045633: 'unverified',\n",
       " 693234925252775936: 'non-rumor',\n",
       " 553136604882014208: 'false',\n",
       " 687786822475366401: 'non-rumor',\n",
       " 525009850104037376: 'true',\n",
       " 436195672677953536: 'false',\n",
       " 521772674155286528: 'unverified',\n",
       " 727187859367546880: 'unverified',\n",
       " 348203818741272576: 'true',\n",
       " 522654603675176960: 'unverified',\n",
       " 692098463262216192: 'non-rumor',\n",
       " 524962676665888769: 'true',\n",
       " 505657661120348163: 'false',\n",
       " 386998038004113409: 'false',\n",
       " 683731779342188545: 'non-rumor',\n",
       " 527947775905103872: 'true',\n",
       " 692374052082708480: 'non-rumor',\n",
       " 747275598347837440: 'unverified',\n",
       " 532231906364911616: 'true',\n",
       " 568075025418317826: 'true',\n",
       " 774833492865593344: 'unverified',\n",
       " 760928376668454912: 'unverified',\n",
       " 728013148788154368: 'unverified',\n",
       " 532297096356167680: 'true',\n",
       " 511918957255991296: 'false',\n",
       " 489975398151618560: 'false',\n",
       " 764573523713011713: 'non-rumor',\n",
       " 693673606279229440: 'non-rumor',\n",
       " 517053056211357696: 'true',\n",
       " 538760398003253248: 'false',\n",
       " 691656866972209152: 'non-rumor',\n",
       " 552661209204260864: 'true',\n",
       " 521821037726797824: 'false',\n",
       " 525040767317082113: 'true',\n",
       " 723644048867774464: 'unverified',\n",
       " 525300042207612928: 'true',\n",
       " 524948703850029056: 'true',\n",
       " 742050150307246080: 'unverified',\n",
       " 427662266993889280: 'true',\n",
       " 521835345059389440: 'false',\n",
       " 520244836084228096: 'false',\n",
       " 516434071631974400: 'unverified',\n",
       " 690431187916099584: 'non-rumor',\n",
       " 560207034545889281: 'false',\n",
       " 407169699559247872: 'true',\n",
       " 407233172380733441: 'true',\n",
       " 527295015643394048: 'true',\n",
       " 536826622725218304: 'true',\n",
       " 749286768554438658: 'unverified',\n",
       " 514067907475959808: 'false',\n",
       " 534337548680978432: 'false',\n",
       " 377002232257855488: 'false',\n",
       " 563123548857053184: 'false',\n",
       " 524925033626738688: 'true',\n",
       " 524685073979678721: 'true',\n",
       " 524976526891417600: 'true',\n",
       " 489836271049928704: 'false',\n",
       " 687674548712505346: 'non-rumor',\n",
       " 407164247768977409: 'true',\n",
       " 531676808836833280: 'true',\n",
       " 686493272806576128: 'non-rumor',\n",
       " 407198255793987584: 'true',\n",
       " 525305690257379328: 'true',\n",
       " 524178388282802176: 'false',\n",
       " 538133250066366464: 'unverified',\n",
       " 514148279601606656: 'true',\n",
       " 693537938425171968: 'non-rumor',\n",
       " 693284114724700160: 'non-rumor',\n",
       " 380055172896272384: 'false',\n",
       " 436074888638763008: 'false',\n",
       " 553040930144792576: 'false',\n",
       " 727179214546456577: 'unverified',\n",
       " 692566765822435328: 'non-rumor',\n",
       " 693822949456007169: 'non-rumor',\n",
       " 539052152602107904: 'false',\n",
       " 693217698482933760: 'non-rumor',\n",
       " 699252269414354944: 'non-rumor',\n",
       " 536808074431184896: 'true',\n",
       " 690954487322796032: 'non-rumor',\n",
       " 516371936604352513: 'unverified',\n",
       " 523566838958284801: 'false',\n",
       " 651486105628463105: 'unverified',\n",
       " 525411618231169025: 'true',\n",
       " 692745861978263552: 'non-rumor',\n",
       " 524181699043663872: 'false',\n",
       " 692692529779707904: 'non-rumor',\n",
       " 524972443308683264: 'unverified',\n",
       " 686701278785843200: 'non-rumor',\n",
       " 520299904934371328: 'true',\n",
       " 524950507023245313: 'true',\n",
       " 376982539132366848: 'false',\n",
       " 524947149134774272: 'true',\n",
       " 516313006427602944: 'unverified',\n",
       " 528976109518721024: 'false',\n",
       " 407209131351629824: 'true',\n",
       " 531183173443801088: 'true',\n",
       " 621025025806577664: 'non-rumor',\n",
       " 693123067204243456: 'non-rumor',\n",
       " 524593547807178753: 'true',\n",
       " 692453106622275585: 'non-rumor',\n",
       " 685232704774787072: 'non-rumor',\n",
       " 519867098945044480: 'false',\n",
       " 685473149291479040: 'non-rumor',\n",
       " 514142113601437696: 'true',\n",
       " 676586804242309121: 'unverified',\n",
       " 676970570160988160: 'non-rumor',\n",
       " 504106473467944960: 'true',\n",
       " 509464271118688257: 'true',\n",
       " 407162181059612672: 'true',\n",
       " 515918632178577408: 'false',\n",
       " 524646141938647043: 'false',\n",
       " 525285199530455040: 'unverified',\n",
       " 767580827513397252: 'non-rumor',\n",
       " 501768276523761666: 'false',\n",
       " 691700082601758720: 'non-rumor',\n",
       " 566999951449001984: 'true',\n",
       " 726086935903494144: 'unverified',\n",
       " 407182194206322689: 'true',\n",
       " 665498651742085124: 'non-rumor',\n",
       " 495583434954514432: 'false',\n",
       " 501774615837163521: 'false',\n",
       " 407212665350942720: 'true',\n",
       " 513779458340569088: 'true',\n",
       " 693501482721681408: 'non-rumor',\n",
       " 707308274270539777: 'unverified',\n",
       " 500354773133299713: 'unverified',\n",
       " 543319210659971073: 'false',\n",
       " 549975297160802304: 'unverified',\n",
       " 692922211338981377: 'non-rumor',\n",
       " 516377014790782977: 'unverified',\n",
       " 537378640653475841: 'false',\n",
       " 490136764619948032: 'false',\n",
       " 553461741917863936: 'unverified',\n",
       " 687956181646753792: 'non-rumor',\n",
       " 689555819059351552: 'non-rumor',\n",
       " 716424773216022530: 'unverified',\n",
       " 507454543169617920: 'false',\n",
       " 445495350171013120: 'true',\n",
       " 689549346875031552: 'non-rumor',\n",
       " 762775964224847872: 'non-rumor',\n",
       " 717081129627553792: 'unverified',\n",
       " 513015596879867904: 'false',\n",
       " 489796088338341888: 'false',\n",
       " 518870005677826050: 'true',\n",
       " 692405195796529152: 'non-rumor',\n",
       " 652349108653551616: 'unverified',\n",
       " 407191058108256257: 'true',\n",
       " 767355933919997952: 'non-rumor',\n",
       " 692840850141753347: 'non-rumor',\n",
       " 356268980211687424: 'false',\n",
       " 407174474207731712: 'true',\n",
       " 407173794583695360: 'true',\n",
       " 407267881131732992: 'true',\n",
       " 767462297270968321: 'non-rumor',\n",
       " 538412662858588160: 'false',\n",
       " 427666815842414592: 'true',\n",
       " 531085761521528834: 'true',\n",
       " 693456387880402944: 'non-rumor',\n",
       " 563117355254161408: 'true',\n",
       " 503610065769598976: 'unverified',\n",
       " 566089610485579776: 'true',\n",
       " 766299795833102336: 'non-rumor',\n",
       " 693618217667928068: 'non-rumor',\n",
       " 514429381737852929: 'false',\n",
       " 552810448324943872: 'unverified',\n",
       " 562313802369073153: 'true',\n",
       " 535579674152148992: 'true',\n",
       " 716092408920936448: 'unverified',\n",
       " 524932056560963584: 'unverified',\n",
       " 727116900983934976: 'unverified',\n",
       " 692336523090509824: 'non-rumor',\n",
       " 524206851450486785: 'false',\n",
       " 407229989902225408: 'true',\n",
       " 526757033186697216: 'unverified',\n",
       " 692537942439501824: 'non-rumor',\n",
       " 524952094986350592: 'true',\n",
       " 514201362729828352: 'true',\n",
       " 682629960826224646: 'non-rumor',\n",
       " 515717938025078784: 'true',\n",
       " 658938136299511808: 'unverified',\n",
       " 692217618778972161: 'non-rumor',\n",
       " 692382471061663744: 'non-rumor',\n",
       " 538129041539092480: 'false',\n",
       " 407205009676587009: 'true',\n",
       " 504243239084236800: 'true',\n",
       " 553115236992290816: 'false',\n",
       " 693408117225299968: 'non-rumor',\n",
       " 522443333831262208: 'unverified',\n",
       " 529857740059860992: 'false',\n",
       " 524935246647926784: 'true',\n",
       " 688281442162352128: 'non-rumor',\n",
       " 691327238353129472: 'non-rumor',\n",
       " 407237676350185475: 'true',\n",
       " 524941041301225472: 'true',\n",
       " 690985215691354112: 'non-rumor',\n",
       " 691564354567798784: 'non-rumor',\n",
       " 544514570367168512: 'unverified',\n",
       " 692287441168900097: 'non-rumor',\n",
       " 524291188422356993: 'true',\n",
       " 549998575330275330: 'unverified',\n",
       " 687639526282498049: 'non-rumor',\n",
       " 536871550184869888: 'true',\n",
       " 500347114975944705: 'unverified',\n",
       " 544939287222165506: 'unverified',\n",
       " 693807822971236353: 'non-rumor',\n",
       " 507242819623587841: 'true',\n",
       " 693560600471863296: 'non-rumor',\n",
       " 529434230938284032: 'true',\n",
       " 551097831851687938: 'true',\n",
       " 652300118205427716: 'unverified',\n",
       " 747443219487678464: 'unverified',\n",
       " 407253554039386112: 'true',\n",
       " 553474188259102720: 'unverified',\n",
       " 755548076438196225: 'unverified',\n",
       " 691351064868732928: 'non-rumor',\n",
       " 524976106861236226: 'true',\n",
       " 517416417164263424: 'true',\n",
       " 372901260183494656: 'false',\n",
       " 767482587879051265: 'non-rumor',\n",
       " 528341588867416064: 'false',\n",
       " 524991067675189250: 'true',\n",
       " 550279538278543360: 'unverified',\n",
       " 767845555313774593: 'non-rumor',\n",
       " 514508854990999553: 'false',\n",
       " 524979548195024898: 'true',\n",
       " 767094580634202112: 'non-rumor',\n",
       " 692391757775028228: 'non-rumor',\n",
       " 524959836778536961: 'true',\n",
       " 693198912660885505: 'non-rumor',\n",
       " 766316494695305218: 'non-rumor',\n",
       " 728038172270198787: 'unverified',\n",
       " 552706985087361025: 'true',\n",
       " 693162950358622208: 'non-rumor',\n",
       " 500298752469770240: 'unverified',\n",
       " 724348906096590849: 'unverified',\n",
       " 691027026552229888: 'non-rumor',\n",
       " 510921017972514817: 'true',\n",
       " 689236842454106113: 'non-rumor',\n",
       " 728207861050970112: 'unverified',\n",
       " 693629063932596224: 'non-rumor',\n",
       " 576319832800555008: 'unverified',\n",
       " 407193347279699968: 'true',\n",
       " 693921710383337472: 'non-rumor',\n",
       " 692800345274241025: 'non-rumor',\n",
       " 690252631822864384: 'non-rumor',\n",
       " 693443289370882049: 'non-rumor',\n",
       " 552615631246622721: 'true',\n",
       " 407187197427593217: 'true',\n",
       " 528607634099015680: 'true',\n",
       " 525032872647065600: 'unverified',\n",
       " 536835346932441088: 'true',\n",
       " 505611045897924608: 'false',\n",
       " 544287209730236416: 'unverified',\n",
       " 551128385850601472: 'true',\n",
       " 521496155310796800: 'false',\n",
       " 532851585408593920: 'false',\n",
       " 692127290998784001: 'non-rumor',\n",
       " 531300385248800768: 'unverified',\n",
       " 516358319317852160: 'unverified',\n",
       " 692792765852286977: 'non-rumor',\n",
       " 560163341524807680: 'false',\n",
       " 504131150429061121: 'true',\n",
       " 500270780832174080: 'unverified',\n",
       " 663744139666804736: 'unverified',\n",
       " 554886875303780352: 'false',\n",
       " 686421576904916992: 'non-rumor',\n",
       " 568905632998412288: 'false',\n",
       " 529088963420909569: 'false',\n",
       " 513922322915282945: 'false',\n",
       " 531524882325311488: 'unverified',\n",
       " 534278078105944064: 'false',\n",
       " 767170849472405508: 'non-rumor',\n",
       " 727623131494387714: 'unverified',\n",
       " 500341884678836224: 'unverified',\n",
       " 524923293711998976: 'true',\n",
       " 357300409292959744: 'false',\n",
       " 752965545528528898: 'unverified',\n",
       " 778572032531427332: 'unverified',\n",
       " 716451800581279744: 'unverified',\n",
       " 542860306603868160: 'false',\n",
       " 701514249269542912: 'unverified',\n",
       " 692031514943582208: 'non-rumor',\n",
       " 407170443443859456: 'true',\n",
       " 519131101798100992: 'true',\n",
       " 550095745161134080: 'unverified',\n",
       " 520265722967375872: 'unverified',\n",
       " 692056985513123840: 'non-rumor',\n",
       " 524317344730869760: 'false',\n",
       " 542600886653292545: 'false',\n",
       " 522692223952228353: 'unverified',\n",
       " 531129816389779456: 'true',\n",
       " 727172374999666688: 'unverified',\n",
       " 723772395211862016: 'unverified',\n",
       " 519530210795216896: 'true',\n",
       " 490141776984825856: 'false',\n",
       " 528312406498635777: 'true',\n",
       " 562387516964491265: 'true',\n",
       " 499679379820412928: 'true',\n",
       " 693662456393134080: 'non-rumor',\n",
       " 525005886272843776: 'true',\n",
       " 537377960471166976: 'false',\n",
       " 356407105953071107: 'false',\n",
       " 692024256520179712: 'non-rumor',\n",
       " 693729678306189313: 'non-rumor',\n",
       " 365056836476481537: 'false',\n",
       " 524211911668543488: 'false',\n",
       " 407192027714322432: 'true',\n",
       " 692472216135569409: 'non-rumor',\n",
       " 693853982293254144: 'non-rumor',\n",
       " 387006553124896768: 'false',\n",
       " 688441130271289350: 'non-rumor',\n",
       " 568452856224747520: 'false',\n",
       " 532239131540467712: 'false',\n",
       " 407232661107642368: 'true',\n",
       " 693140568143220736: 'non-rumor',\n",
       " 504771233557147648: 'false',\n",
       " 532007252677656576: 'false',\n",
       " 521790189715615744: 'false',\n",
       " 740748123581087745: 'unverified',\n",
       " 519375764165509121: 'false',\n",
       " 506118402466152449: 'false',\n",
       " 568568737621630977: 'false',\n",
       " 524263570985340928: 'true',\n",
       " 387309869080281088: 'false',\n",
       " 539115704285270017: 'false',\n",
       " 690174644071047168: 'non-rumor',\n",
       " 661184793342775296: 'non-rumor',\n",
       " 693492349142433792: 'non-rumor',\n",
       " 693136527816331264: 'non-rumor',\n",
       " 692510002586714113: 'non-rumor',\n",
       " 764473554431725568: 'non-rumor',\n",
       " 549124560793374720: 'false',\n",
       " 516539137646542848: 'unverified',\n",
       " 519935535083565056: 'false',\n",
       " 524949828711620608: 'true',\n",
       " 427652211414274048: 'true',\n",
       " 504109775358287872: 'true',\n",
       " 489970164469751808: 'false',\n",
       " 766789709045518336: 'unverified',\n",
       " 652982112870662144: 'unverified',\n",
       " 532246457609244672: 'false',\n",
       " 553190393370923008: 'false',\n",
       " 387010299536355328: 'false',\n",
       " 690920430287331328: 'non-rumor',\n",
       " 504267796424163328: 'true',\n",
       " 510931521306243072: 'true',\n",
       " 703234354579898368: 'unverified',\n",
       " 524144369235140608: 'false',\n",
       " 524998384357486592: 'true',\n",
       " 692267960686284800: 'non-rumor',\n",
       " 366205297557110784: 'false',\n",
       " 655815788675399680: 'unverified',\n",
       " 407258075502899200: 'true',\n",
       " 537484793882361856: 'unverified',\n",
       " 691791477912014850: 'non-rumor',\n",
       " 726190016435728385: 'unverified',\n",
       " 764620072228970496: 'non-rumor',\n",
       " 500391222075076610: 'unverified',\n",
       " 690676539323740161: 'non-rumor',\n",
       " 517034091028676608: 'unverified',\n",
       " 509478961705807872: 'true',\n",
       " 528357165677817856: 'true',\n",
       " 692031224227958789: 'non-rumor',\n",
       " 407217865973329920: 'true',\n",
       " 693792605755285504: 'non-rumor',\n",
       " 560414522758799361: 'false',\n",
       " 692709993573789701: 'non-rumor',\n",
       " 748640007934590976: 'unverified',\n",
       " 724320681517670400: 'unverified',\n",
       " 518924377791164420: 'true',\n",
       " 372489152903643136: 'false',\n",
       " 500381163866062848: 'unverified',\n",
       " 516682897864916992: 'unverified',\n",
       " 693236688538243074: 'non-rumor',\n",
       " 551082055883563008: 'true',\n",
       " 525291306000846848: 'true',\n",
       " 524757333608394752: 'true',\n",
       " 693484841011060737: 'non-rumor',\n",
       " 365276206381268995: 'false',\n",
       " 701175292937707520: 'unverified',\n",
       " 693523133920342016: 'non-rumor',\n",
       " 515538937058971648: 'true',\n",
       " 524976980799000578: 'true',\n",
       " 489818014364086273: 'false',\n",
       " 693804630615166977: 'non-rumor',\n",
       " 693098795022725120: 'non-rumor',\n",
       " 560873850846846978: 'false',\n",
       " 689103487880310785: 'non-rumor',\n",
       " 688147918113538049: 'non-rumor',\n",
       " 678733118648524800: 'non-rumor',\n",
       " 387030572779847680: 'false',\n",
       " 407269894938718208: 'true',\n",
       " 407179055763836928: 'true',\n",
       " 692134721195786240: 'non-rumor',\n",
       " 693481256735150082: 'non-rumor',\n",
       " 552112474913136641: 'false',\n",
       " 528562608463704064: 'true',\n",
       " 524933380929245184: 'true',\n",
       " 537913349338435584: 'true',\n",
       " 436157398269849602: 'true',\n",
       " 693658242044542976: 'non-rumor',\n",
       " 687911282922631168: 'non-rumor',\n",
       " 527162170531151872: 'true',\n",
       " 500280422295937024: 'unverified',\n",
       " 524950264303075328: 'true',\n",
       " 693079616072880128: 'non-rumor',\n",
       " 407159686786732032: 'true',\n",
       " 524976963862429697: 'true',\n",
       " 495365942726381568: 'false',\n",
       " 692932696645865472: 'non-rumor',\n",
       " 692742353736568833: 'non-rumor',\n",
       " 336913656921083904: 'true',\n",
       " 529747368087126019: 'true',\n",
       " 528240834660298753: 'false',\n",
       " 387006700609228801: 'false',\n",
       " 525024181218725888: 'true',\n",
       " 524289058290159616: 'false',\n",
       " 470025166760980483: 'false',\n",
       " 295152287901417472: 'true',\n",
       " 724703995147751424: 'unverified',\n",
       " 387220814531395584: 'false',\n",
       " 501474962935648256: 'false',\n",
       " 693485676881403905: 'non-rumor',\n",
       " 509463916586762241: 'true',\n",
       " 692736398462300162: 'non-rumor',\n",
       " 688763099298529280: 'non-rumor',\n",
       " 557621135320178688: 'false',\n",
       " 553061846081896449: 'false',\n",
       " 693131575261659137: 'non-rumor',\n",
       " 723521076446142465: 'unverified',\n",
       " 757748522481491968: 'unverified',\n",
       " 407176873605865472: 'true',\n",
       " 716439952922312704: 'unverified',\n",
       " 489794593580650497: 'false',\n",
       " 532203605236154368: 'true',\n",
       " 516861311725342722: 'unverified',\n",
       " 692491478619262976: 'non-rumor',\n",
       " 692925163994796033: 'non-rumor',\n",
       " 500294803402137600: 'unverified',\n",
       " 567765185407234049: 'true',\n",
       " 387236601450864641: 'false',\n",
       " 514395350615203841: 'true',\n",
       " 692735354240114688: 'non-rumor',\n",
       " 553470492565602305: 'unverified',\n",
       " 544512910538838016: 'unverified',\n",
       " 523655026485366784: 'true',\n",
       " 693573042111270912: 'non-rumor',\n",
       " 407170170533064705: 'true',\n",
       " 516435293235183617: 'unverified',\n",
       " 527286846347567104: 'true',\n",
       " 682548344690970624: 'non-rumor',\n",
       " 387353560356118528: 'false',\n",
       " 489796998615547904: 'false',\n",
       " 743058135300988932: 'unverified',\n",
       " 535257207991205888: 'unverified',\n",
       " 693631772639145984: 'non-rumor',\n",
       " 692881863644282880: 'non-rumor',\n",
       " 524929796862918656: 'true',\n",
       " 778949749156245504: 'unverified',\n",
       " 715255507506892800: 'unverified',\n",
       " 693476901772513280: 'non-rumor',\n",
       " 516361247223058432: 'unverified',\n",
       " 407158558158254081: 'true',\n",
       " 514100169307348992: 'true',\n",
       " 523820806917603328: 'true',\n",
       " 648993731169939456: 'unverified',\n",
       " 517003426832453633: 'true',\n",
       " 690915657106526208: 'non-rumor',\n",
       " 509473920060104704: 'true',\n",
       " 504433135036407808: 'false',\n",
       " 525134787778850818: 'true',\n",
       " 651786568592658433: 'unverified',\n",
       " 549920761423863808: 'unverified',\n",
       " 518827403452637184: 'true',\n",
       " 640182854928961536: 'unverified',\n",
       " 499368931367608320: 'unverified',\n",
       " 727588444000526336: 'unverified',\n",
       " 506784541696991232: 'false',\n",
       " 407163869673443328: 'true',\n",
       " 519275971758026752: 'true',\n",
       " 693487871806689280: 'non-rumor',\n",
       " 357299879070023680: 'false',\n",
       " 491591245152935936: 'false',\n",
       " 506656271517622272: 'false',\n",
       " 514106939173634048: 'true',\n",
       " 701975210044497921: 'unverified',\n",
       " 693121477491757056: 'non-rumor',\n",
       " 507354546041925632: 'false',\n",
       " 692924229017309184: 'non-rumor',\n",
       " 688027085730897921: 'non-rumor',\n",
       " 514535408126795776: 'false',\n",
       " 742055437932040193: 'unverified',\n",
       " 692063305297453060: 'non-rumor',\n",
       " 693355140284334081: 'non-rumor',\n",
       " 562090534945431552: 'true',\n",
       " 524924619812511746: 'unverified',\n",
       " 524925124303396864: 'true',\n",
       " 365497016098369536: 'false',\n",
       " 689722724470620161: 'non-rumor',\n",
       " 697992796565741569: 'unverified',\n",
       " 553186555150749696: 'false',\n",
       " 766358933296517121: 'non-rumor',\n",
       " 489802446001434625: 'false',\n",
       " 356310469390245888: 'false',\n",
       " 427725166127226881: 'true',\n",
       " 356437941641416705: 'false',\n",
       " 516978780171436032: 'true',\n",
       " 551942365901250560: 'false',\n",
       " 760120409429643266: 'unverified',\n",
       " 489798219283857408: 'false',\n",
       " 651858133304782848: 'non-rumor',\n",
       " 766307327922335745: 'non-rumor',\n",
       " 522468118082633729: 'unverified',\n",
       " 500281094239817728: 'unverified',\n",
       " 500307001629745152: 'unverified',\n",
       " 364383457545162754: 'false',\n",
       " 387050178932654080: 'false',\n",
       " 765141361033109504: 'non-rumor',\n",
       " 407209222850756608: 'true',\n",
       " 767737100347191297: 'non-rumor',\n",
       " 553264892799488000: 'false',\n",
       " 531900034863091712: 'false',\n",
       " 757450526153900032: 'unverified',\n",
       " 525077806401994754: 'true',\n",
       " 407156621450571777: 'true',\n",
       " 387041762331471872: 'false',\n",
       " 691410231549530112: 'non-rumor',\n",
       " 507557659206516736: 'false',\n",
       " 514550436377137152: 'false',\n",
       " 692818857187213313: 'non-rumor',\n",
       " 728631482722308096: 'unverified',\n",
       " 544510450101415936: 'unverified',\n",
       " 767154835993063425: 'non-rumor',\n",
       " 511330322831507456: 'unverified',\n",
       " 764505291853627392: 'non-rumor',\n",
       " 693622130735484929: 'non-rumor',\n",
       " 547514662695469057: 'unverified',\n",
       " 524970097711267841: 'true',\n",
       " 780436430732525569: 'unverified',\n",
       " 519112613800972288: 'true',\n",
       " 407206094747209728: 'true',\n",
       " 692497796956561408: 'non-rumor',\n",
       " 688752484966363136: 'non-rumor',\n",
       " ...}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "twitter15_labels"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load Tweet Texts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "twitter15_text = {}\n",
    "def load_text(file):\n",
    "    f = open(file,'r')\n",
    "    labels = {}\n",
    "    \n",
    "    raw_data = f.readlines()\n",
    "    \n",
    "    for line in raw_data:\n",
    "        line = line.strip()\n",
    "        line = line.split('\\t')\n",
    "#         print(line[0],line[1])\n",
    "        labels[int(line[0])] = line[1]\n",
    "    \n",
    "    return labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "twitter15_text = load_text(twitter15_text_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Structure For Storing Tree Structures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Node:\n",
    "    def __init__(self,uid,tid,time_stamp):\n",
    "        self.children = {}\n",
    "        self.tid = tid\n",
    "        self.uid = uid\n",
    "        self.time_stamp = time_stamp\n",
    "        \n",
    "        self.branching = 0\n",
    "    \n",
    "    def add_child(self,node):\n",
    "        self.children[node.uid] = node\n",
    "        self.branching += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Tree:\n",
    "    def __init__(self,root):\n",
    "        self.root = root\n",
    "        self.tweet_id = root.tid\n",
    "        self.uid = root.uid\n",
    "        self.height = 0\n",
    "        self.nodes = 0\n",
    "    \n",
    "    def show(self):\n",
    "        queue = [self.root,0]\n",
    "        \n",
    "        while len(queue) != 0:\n",
    "            toprint = queue.pop(0)\n",
    "            if toprint == 0:\n",
    "                print('\\n')\n",
    "            else:\n",
    "                print(toprint.uid,end=' ')\n",
    "                queue += toprint.children.values()\n",
    "                queue.append(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Get User Information from API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import tweepy\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as pkl\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "consumer_key=\"LydBaZlcnj64wAhz2zqYSSDG8\"\n",
    "consumer_secret=\"lNDf9f9xcz3qjbXVxcKaF65t8vEvwpDlRQbljwLWS6NOst0rWb\"\n",
    "access_key=\"747987764579176448-TmAC6q6EYzK2czKk2wpwSBCycYFjLpd\"\n",
    "access_secret=\"PHaIgoAFNZVdocFVepu3MwrMauzWlDSrTjVBUSrcpUTcD\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
    "auth.set_access_token(access_key, access_secret)\n",
    "api = tweepy.API(auth)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_userinfo(uid):\n",
    "    user_vect = []\n",
    "    user = api.get_user(uid)\n",
    "    if user.friends_count:\n",
    "        ratio = user.followers_count / user.friends_count\n",
    "    else:\n",
    "        ratio = 100000*user.followers_count\n",
    "    user_vect.append((user.followers_count,user.friends_count,ratio,user.statuses_count,user.created_at,user.verified))\n",
    "    return user, user_vect"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_count = defaultdict(lambda: 0)\n",
    "user_data = {}\n",
    "user_features = {}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load Trees"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from os import listdir\n",
    "from os.path import isfile, join\n",
    "import re\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "datapath = '../twitter15/tree/'\n",
    "treefiles = [f for f in listdir(datapath) if isfile(join(datapath, f))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def insertnode(tree,curnode,parent,child):\n",
    "#     print(curnode.uid,parent.uid,child.uid)\n",
    "    if curnode.uid == parent.uid:\n",
    "        curnode.add_child(child)\n",
    "        return 1\n",
    "    \n",
    "    elif parent.uid in curnode.children:\n",
    "        s = insertnode(tree,curnode.children[parent.uid],parent,child)\n",
    "        return 2\n",
    "    else:\n",
    "        for node in curnode.children:\n",
    "            s = insertnode(tree,curnode.children[node],parent,child)\n",
    "            if s == 2:\n",
    "                break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def file2tree(datapath,txtfile):\n",
    "    f = open(datapath+txtfile,'r')\n",
    "    rawtree = f.readlines()\n",
    "    tree = None\n",
    "    count = 0\n",
    "    nodes = {}\n",
    "    root = -1\n",
    "    \n",
    "    for line in rawtree:\n",
    "        count += 1\n",
    "        node1 = None\n",
    "        \n",
    "        line = line.strip()\n",
    "        edge = re.split('\\]->\\[',line)\n",
    "        edge[0] = edge[0][1:].split(', ')\n",
    "        edge[1] = edge[1][:-1].split(', ')\n",
    "        \n",
    "        edge[0] = [x[1:-1] for x in edge[0]]\n",
    "        edge[1] = [x[1:-1] for x in edge[1]]\n",
    "        \n",
    "        edge[1][0] = int(edge[1][0])\n",
    "        edge[1][1] = int(edge[1][1])\n",
    "        edge[1][2] = float(edge[1][2])\n",
    "        \n",
    "        user_count[edge[1][0]] += 1\n",
    "        \n",
    "        node2 = Node(edge[1][0],edge[1][1],edge[1][2])\n",
    "        \n",
    "        if edge[0][1][0] != 'R':\n",
    "            edge[0][0] = int(edge[0][0])\n",
    "            edge[0][1] = int(edge[0][1])\n",
    "            edge[0][2] = float(edge[0][2])\n",
    "            \n",
    "#             print(edge)\n",
    "            \n",
    "            node1 = Node(edge[0][0],edge[0][1],edge[0][2])\n",
    "            insertnode(tree,root,node1,node2)\n",
    "            tree.nodes += 1\n",
    "            \n",
    "        else:\n",
    "            root = node2\n",
    "            nodes[node2.uid] = node2\n",
    "            tree = Tree(root)\n",
    "            tree.nodes += 1\n",
    "    \n",
    "    return tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def check_children(node,h):\n",
    "    branching_factor = []\n",
    "#     print(node.uid)\n",
    "    \n",
    "    for key in node.children:\n",
    "#         print(node.branching)\n",
    "        child = node.children[key]\n",
    "        if child.branching > 0:\n",
    "            branching_factor.append(1)\n",
    "            check_children(child,h)\n",
    "            \n",
    "        else:\n",
    "            branching_factor.append(0)\n",
    "            \n",
    "    branching_factor = np.asarray(branching_factor)\n",
    "    \n",
    "    if np.any(branching_factor):\n",
    "        h += 1\n",
    "        \n",
    "    return h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_treeheight(tree):\n",
    "    h = check_children(tree.root,2)\n",
    "    return h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c3f85e5a0875495bbde35ccdeeeaf16a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=1490), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "516371936604352513.txt\n",
      "766808183696351233.txt\n",
      "519868410599993344.txt\n",
      "514517213022543872.txt\n",
      "531607884220485632.txt\n",
      "523123779124600833.txt\n",
      "693729678306189313.txt\n",
      "693631772639145984.txt\n",
      "745365403237376000.txt\n",
      "689719109785907200.txt\n",
      "550175324634619904.txt\n",
      "528822281972498432.txt\n",
      "689722724470620161.txt\n",
      "538975342011363328.txt\n",
      "752965545528528898.txt\n",
      "519929168864497664.txt\n",
      "715255507506892800.txt\n",
      "\n"
     ]
    }
   ],
   "source": [
    "twitter15_trees = []\n",
    "for treefile in tqdm(treefiles):\n",
    "    try:\n",
    "        tree = file2tree(datapath,treefile)\n",
    "        tree.height = get_treeheight(tree)\n",
    "\n",
    "        twitter15_trees.append(tree)\n",
    "    except:\n",
    "        print(treefile)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Basic Stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "avght = 0\n",
    "avgnodes = 0\n",
    "for tree in twitter15_trees:\n",
    "    avght += tree.height\n",
    "    avgnodes += tree.nodes\n",
    "avght = avght/len(twitter15_trees)\n",
    "avgnodes = avgnodes/len(twitter15_trees)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.9966055668703326"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "avght"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "417.04684317718943"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "avgnodes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Scraping User Info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5dee991ce1624085ad617f12ee1f53e0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=474469), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "ename": "TweepError",
     "evalue": "[{'code': 50, 'message': 'User not found.'}]",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTweepError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-41-3608de22ff1f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muser_count\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0muser_data\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m         \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mvect\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_userinfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m         \u001b[0muser_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0muser_features\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvect\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-40-c6e574376c65>\u001b[0m in \u001b[0;36mget_userinfo\u001b[0;34m(uid)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_userinfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0muser_vect\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m     \u001b[0muser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_user\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0muser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfriends_count\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0mratio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0muser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfollowers_count\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0muser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfriends_count\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/general/lib/python3.6/site-packages/tweepy/binder.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    248\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    249\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    252\u001b[0m     \u001b[0;31m# Set pagination mode\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/general/lib/python3.6/site-packages/tweepy/binder.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    232\u001b[0m                     \u001b[0;32mraise\u001b[0m \u001b[0mRateLimitError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    233\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 234\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0mTweepError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merror_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mapi_code\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mapi_error_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    235\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    236\u001b[0m             \u001b[0;31m# Parse the response payload\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTweepError\u001b[0m: [{'code': 50, 'message': 'User not found.'}]"
     ]
    }
   ],
   "source": [
    "count = 0\n",
    "for key in tqdm(user_count):\n",
    "    try:\n",
    "        if key not in user_data:\n",
    "            data,vect = get_userinfo(key)\n",
    "            user_data[key] = data\n",
    "            user_features[key] = vect\n",
    "    except:\n",
    "        count += 1\n",
    "        print(key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "general",
   "language": "python",
   "name": "general"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
